
#### HOUSEKEEPING ####

rm(list = ls())

## PACKAGES
package_list <- c("data.table", "tidyverse")
lapply(package_list, require, character.only = TRUE)

## DIRECTORIES
source("R/00_paths.R")

## MASKED PARAMETERS
start_announce_mon <- NA # first month of announcement
end_announce_mon <- NA # last month of announcement
end_treat_mon <- NA # last month of treatment period

#### BUILD SUBSCRIBER-LEVEL SET OF PRE AND POST OUTCOMES ####

#### ~ LOAD DAILY DATA ####

sd_df <- fread(file = paste0(dat_dir, "subdate_final.csv"))

#### ~ PRE AND POST USAGE ####

usage_df <- sd_df %>%
  mutate(month = month(date)) %>%
  mutate(period = ifelse(month< start_announce_mon, 1,
                         ifelse(month > end_announce_mon, 3,
                                2))) %>%
  filter(period %in% c(1,3)) %>%
  mutate(gb_video = gb_youtube + gb_netflix + gb_slingtv + gb_hulu + gb_othervideo) %>%
  mutate(gb_other = tot_gb - gb_video - gb_browsing) %>%
  group_by(customer_key, period) %>%
  mutate(tot_gb = mean(tot_gb)) %>%
  mutate(gb_video = mean(gb_video)) %>%
  mutate(gb_browsing = mean(gb_browsing)) %>%
  mutate(gb_other = mean(gb_other)) %>%
  mutate(gb_netflix = mean(gb_netflix)) %>%
  mutate(gb_youtube = mean(gb_youtube)) %>%
  mutate(gb_hulu = mean(gb_hulu)) %>%
  mutate(gb_slingtv = mean(gb_slingtv)) %>%
  ungroup() %>%
  select(customer_key, period, tot_gb, gb_video, gb_browsing, gb_other, gb_netflix, gb_youtube, gb_hulu, gb_slingtv) %>%
  distinct() %>%
  group_by(customer_key) %>%
  arrange(period) %>%
  mutate(st_tot_gb = first(tot_gb)) %>%
  mutate(en_tot_gb = last(tot_gb)) %>%
  mutate(st_gb_video = first(gb_video)) %>%
  mutate(en_gb_video = last(gb_video)) %>%
  mutate(st_gb_browsing = first(gb_browsing)) %>%
  mutate(en_gb_browsing = last(gb_browsing)) %>%
  mutate(st_gb_other = first(gb_other)) %>%
  mutate(en_gb_other = last(gb_other)) %>%
  mutate(st_gb_netflix = first(gb_netflix)) %>%
  mutate(en_gb_netflix = last(gb_netflix)) %>%
  mutate(st_gb_youtube = first(gb_youtube)) %>%
  mutate(en_gb_youtube = last(gb_youtube)) %>%
  mutate(st_gb_hulu = first(gb_hulu)) %>%
  mutate(en_gb_hulu = last(gb_hulu)) %>%
  mutate(st_gb_slingtv = first(gb_slingtv)) %>%
  mutate(en_gb_slingtv = last(gb_slingtv)) %>%
  ungroup() %>%
  mutate(del_gb = en_tot_gb - st_tot_gb) %>%
  mutate(del_video = en_gb_video - st_gb_video) %>%
  mutate(del_browsing = en_gb_browsing - st_gb_browsing) %>%
  mutate(del_other = en_gb_other - st_gb_other) %>%
  mutate(del_netflix = en_gb_netflix - st_gb_netflix) %>%
  mutate(del_youtube = en_gb_youtube - st_gb_youtube) %>%
  mutate(del_hulu = en_gb_hulu - st_gb_hulu) %>%
  mutate(del_slingtv = en_gb_slingtv - st_gb_slingtv) %>%
  select(customer_key, starts_with("del_"), starts_with("st_"), starts_with("en_")) %>%
  distinct()

#### ~ TIER CHANGES BETWEEN END OF PRE AND END OF POST ####

tier_df <- sd_df %>%
  mutate(month = month(date)) %>%
  filter(month==end_announce_mon | month==end_treat_mon) %>%
  group_by(customer_key, month) %>%
  arrange(date) %>%
  filter((1:n()) == n()) %>%
  ungroup() %>%
  group_by(customer_key) %>%
  mutate(st_vid = first(vid_flag)) %>%
  mutate(en_vid = last(vid_flag)) %>%
  mutate(st_tier = first(svc_tier)) %>%
  mutate(en_tier = last(svc_tier)) %>%
  ungroup() %>%
  mutate(upg_tier = as.numeric(en_tier > st_tier)) %>%
  mutate(dng_tier = as.numeric(en_tier < st_tier)) %>%
  mutate(add_vid = as.numeric(en_vid > st_vid)) %>%
  mutate(drop_vid = as.numeric(en_vid < st_vid)) %>%
  select(customer_key, upg_tier, dng_tier, add_vid, drop_vid, st_tier, st_vid, en_tier, en_vid) %>%
  distinct()

#### ~ MONTHLY BILLING OUTCOMES ####

sm_df <- sd_df %>%
  mutate(month = month(date)) %>%
  filter(month>=start_announce_mon) %>%
  group_by(customer_key, month) %>%
  mutate(tot_gb = mean(tot_gb)) %>%
  arrange(date) %>%
  mutate(en_vid_m = last(vid_flag)) %>%
  mutate(en_tier_m = last(svc_tier)) %>%
  ungroup() %>%
  select(customer_key, month, tot_gb, en_vid_m, en_tier_m) %>%
  distinct()

## PIVOT 

sm_gb_df <- sm_df %>%
  select(customer_key, month, tot_gb) %>%
  pivot_wider(
    names_from = month,
    names_prefix = "tot_gb_",
    names_sep = "_",
    values_from = tot_gb
  )
sm_vid_df <- sm_df %>%
  select(customer_key, month, en_vid_m) %>%
  pivot_wider(
    names_from = month,
    names_prefix = "en_vid_",
    names_sep = "_",
    values_from = en_vid_m
  )
sm_tier_df <- sm_df %>%
  select(customer_key, month, en_tier_m) %>%
  pivot_wider(
    names_from = month,
    names_prefix = "en_tier_",
    names_sep = "_",
    values_from = en_tier_m
  )

bill_df <- sm_gb_df %>% left_join(sm_vid_df) %>% left_join(sm_tier_df)

final_df <- usage_df %>% left_join(tier_df) %>% left_join(bill_df)

fwrite(x = final_df, file = paste0(dat_dir, "ubp_outcomes.csv"))

